global input "Q:\dc1prhcmsas01\PU2\data - sas"
global temp "Q:\dc1prhcmsas01\PU2\temp_stata_wr"
global log "Q:\dc1prhcmsas01\PU2\Log - Stata WR"
global output "Q:\dc1prhcmsas01\PU2\data_stata_wr"
global graphs "Q:\dc1prhcmsas01\PU2\graphs_stata_wr"
global tables "Q:\dc1prhcmsas01\PU2\tables_stata_wr"

cd "Q:\dc1prhcmsas01\PU2\Code - Stata WR"

capture log close
log using "$log/9_plot_distribution_new_hires_het", replace

set scheme plotplain
graph set window fontface "Times New Roman"


*Prep Data:
	*Calculate number of workers in each firm across distribution
	use "$temp\new_hires_treat.dta", clear
	keep if yr_month>=mdy(01,01,2016) & yr_month<=mdy(12,01,2017)
	replace bin=round(floor((base-913)/96.15)*96.15+913)
	drop if bin<=0 | bin>2000
	
	*Drop if only temporary layoff 
		duplicates tag emp_pur_c client, gen(tag)
		egen min_hired=min(yr_month), by(emp_pur_c client)
		keep if min_hired==yr_month
		
	*Collapse
	collapse (count) n=base, by(yr_month client_id salaried bin)
	
		*Expand to fill in bin
			gegen group=group(yr_month client_id salaried)
			replace bin=round((bin-913)/96.15)
			tsset group bin  
			tsfill, full
				
			bysort group: carryforward yr_month, replace
			bysort group: carryforward client_id, replace
			bysort group: carryforward salaried, replace
			
			gsort group -bin 
			bysort group: carryforward yr_month, replace
			bysort group: carryforward client_id, replace
			bysort group: carryforward salaried, replace
				
			drop group
			replace bin=bin*96.15+913
			
		*Expand to fill in salaried
			gegen group=group(yr_month client_id bin)
			tsset group salaried 
			tsfill, full
				
			bysort group: carryforward yr_month, replace
			bysort group: carryforward client_id, replace
			bysort group: carryforward bin, replace
				
			gsort group -salaried 
			bysort group: carryforward yr_month, replace
			bysort group: carryforward client_id, replace
			bysort group: carryforward bin, replace
				
			drop group
			
		*Fill in missing outcomes 
			replace n=0 if n==.
	
	*Calculate shares
	bysort yr_month client_id salaried: egen emp=sum(n)
	gen share=n/emp
	
*Separate firms based on their baseline characteristics
	*Bunch 
	preserve 
		use "$output\stayers_distribution_treat.dta", clear 
		keep if yr_month==mdy(12,1,2016) & (bin==913 | bin==933) & salaried==1
		collapse (sum) n, by(client_id)
		gen bunch=n>0
		keep client_id bunch 
		tempfile temp
		save `temp'
	restore 
	merge m:1 client_id using `temp'
	keep if _merge==1 | _merge==3
	rename _merge merge_bunch
	
	*Size 
	preserve 
		keep if yr_month==mdy(4,1,2016)
		collapse (sum) emp2=n, by(client_id)
		egen median=median(emp2)
		gen size=emp2>=median
		keep client_id size 
		tempfile temp
		save `temp'
	restore 
	merge m:1 client_id using `temp'
	keep if _merge==1 | _merge==3
	rename _merge merge_size
	
	*Turnover
	preserve 
		use state client_id emp_pur_c salaried using "$temp\cleaned_201505", clear
		merge 1:1 client_id state emp_pur_c using "$temp\cleaned_201604", keepusing(client_id state emp_pur_c)
		keep if _merge==1 | _merge==3
		keep if salaried==1
		gen sep=_merge==1
		collapse (mean) sep, by(client_id)
		egen median=median(sep)
		gen turnover=sep>=median
		keep client_id turnover 
		tempfile temp
		save `temp'
	restore 
	merge m:1 client_id using `temp'
	keep if _merge==1 | _merge==3
	rename _merge merge_turnover
	
	save "$temp/hires_het", replace
	
*Program to plot distribution of new hires' base pay
cap program drop plot_dist
program define plot_dist
args char
	
	use "$temp/hires_het", clear
	keep if merge_`char'==3
	
	*Collapse across firms
	collapse (mean) share, by(yr_month `char' salaried bin)
	drop if bin<=0 | bin>2000
	
	*Difference relative to April 2016
	preserve 
		keep if yr_month==mdy(4,01,2016)
		rename share share0 
		drop yr_month
		tempfile temp 
		save `temp'
	restore 
	
	merge m:1 `char' salaried bin using `temp'
	drop _merge 
	gen diff_share=share-share0
	gen time=mofd(yr_month)-mofd(mdy(4,01,2016))
	
	*Label each month for graph
	label define time_lab -3 "Jan 2016" -2 "Feb 2016" -1 "Mar 2016" 0 "Apr 2016" 1 "May 2016" 2 "Jun 2016" ///
		3 "Jul 2016" 4 "Aug 2016" 5 "Sep 2016" 6 "Oct 2016" 7 "Nov 2016" 8 "Dec 2016" ///
		9 "Jan 2017" 10 "Feb 2017" 11 "Mar 2017" 12 "Apr 2017" 13 "May 2017" 14 "Jun 2017" ///
		15 "Jul 2017" 16 "Aug 2017" 17 "Sep 2017" 18 "Oct 2017" 19 "Nov 2017" 20 "Dec 2017", replace
	label values time time_lab 
	
	*Plot 
	graph twoway (connect diff_share bin if salaried==1 & `char'==1, by(time, rows(4) note("")) lc(black) mcolor(black)), ///
		xline(455, lcolor(black)) xline(913, lcolor(red)) xtitle("Weekly Base Pay") xscale(off) yline(0) ///
		ytitle("Difference in Share of New Hires Relative to April 2016") ylabel(-0.03(0.01)0.03, nogextend) plotregion(lcolor(black)) 
	graph export "$graphs/fig_panel_hires_salaried_`char'1.eps", replace
	graph export "$graphs/fig_panel_hires_salaried_`char'1.jpg", replace

	graph twoway (connect diff_share bin if salaried==1 & `char'==0, by(time, rows(4) note("")) lc(black) mcolor(black)), ///
		xline(455, lcolor(black)) xline(913, lcolor(red)) xtitle("Weekly Base Pay") xscale(off) yline(0) ///
		ytitle("Difference in Share of New Hires Relative to April 2016") ylabel(-0.03(0.01)0.03, nogextend) plotregion(lcolor(black)) 
	graph export "$graphs/fig_panel_hires_salaried_`char'0.eps", replace
	graph export "$graphs/fig_panel_hires_salaried_`char'0.jpg", replace
	 
end 

*Fig A6, A7, and more:	
plot_dist bunch
plot_dist size
plot_dist turnover

*Delete temp files
	erase "$temp\hires_het.dta"

log close 

